## Oatley 2011 IO

library(foreign) 
library(Amelia)
library(mi)
library(hot.deck)
library(mice)
library(mirt)
library(miceadds)

## Load original dataset
o2011 <- read.dta("O2011 IO Rep Data.dta")
head(o2011)
dim(o2011)

## Drop ID vars
o2011$ctylabel <- NULL

## How many variables? 49: no reduction necessary
dim(o2011)

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(o2011)
mean(NAs(o2011)/nrow(o2011))*100

## Thus: 26 imputations
## Note: already lags for polityiv_update2, gatt_wto_new, newtar
set.seed(02138)
o2011.out <- amelia(o2011, m = 26, ts = "date", cs = "country", polytime = 3, lags = "inter1", empri = 0.01*nrow(o2011))

write.amelia(obj= o2011.out, file.stem = "O2011 IO Imp Data", format = "dta", separate = FALSE)

## m = 5
set.seed(02138)
o2011.out.5 <- amelia(o2011, m = 5, ts = "date", cs = "country", polytime = 3, lags = "inter1", empri = 0.01*nrow(o2011))

write.amelia(obj= o2011.out.5, file.stem = "O2011 Imp Data 5", format = "dta", separate = FALSE)

## m = % incomplete observations
complete <- complete.cases(o2011)
sum(!complete)/nrow(o2011)*100
## Thus: 93 imputations
set.seed(02138)
o2011.out.93 <- amelia(o2011, m = 93, ts = "date", cs = "country", polytime = 3, lags = "inter1", empri = 0.01*nrow(o2011))
write.amelia(obj= o2011.out.93, file.stem = "O2011 Imp Data 93", format = "dta", separate = FALSE)

## HD
o2011.out.hd <- hot.deck(o2011, m = 26)
o2011.out.hd <- hd2amelia(o2011.out.hd)
write.amelia(obj= o2011.out.hd, file.stem = "O2011 Imp Data HD", format = "dta", separate = FALSE)

## MICE
o2011_mice <- mice(o2011, m = 26)
datlist <- mids2datlist(o2011_mice)
o2011_amelia <- list( "imputations"= datlist)
class(o2011_amelia) <- "amelia"
write.amelia(obj= o2011_amelia, file.stem = "O2011 Imp Data MICE", format = "dta", separate = FALSE)